# heading ----
rm(list = setdiff(ls(), c(lsf.str(), ls.str(mode = "character"))))
Sys.setenv(LANG = "en")
message(paste0("Date: ", format(Sys.Date(), "%d %B %Y")),
        "\n",
        paste0("By: ", "Cedric Chambru & Paul Maneuvrier-Hervieu"),
        "\n",
        paste0("Description: ", "code - compute descriptive statistics on HiSCoD"),
        "\n",
        paste0("Version of R used: ", R.Version()$version.string))


# load data: HiSCoD ----
df <- readRDS(paste0(dir_data, "db_hiscod_rds_v1_en.rds"))


# Table 1: Overview of the main existing data sets included in HiSCoD ----
df %>%
	# create variable: dataset_name
	dplyr::mutate(
		dataset_name = dplyr::case_when(
			stringr::str_detect(
				string = id_riot_original_database,
				pattern = "JN_"
			) ~ "Jean Nicolas",
			stringr::str_detect(
				string = id_riot_original_database,
				pattern = "ALF_"
			) ~ "Aurélien Lignereux",
			stringr::str_detect(
				string = id_riot_original_database,
				pattern = "SWING_"
			) ~ "Michael Holland",
			stringr::str_detect(
				string = id_riot_original_database,
				pattern = "SCI_"
			) ~ "Samuel Cohn",
			stringr::str_detect(
				string = id_riot_original_database,
				pattern = "BOH_"
			) ~ "John Bohstedt",
			stringr::str_detect(
				string = id_riot_original_database,
				pattern = "EPMH_"
			) ~ "Paul Maneuvrier-Hervieu",
			stringr::str_detect(
				string = bibliography_1,
				pattern = "Tiratelli"
			) ~ "Matteo Tiratelli",
			.default = NULL
			)
		) %>%
			# select data: retain observation from existing databases
			dplyr::filter(
				!is.na(dataset_name)
			) %>%
	mutate(
		dataset_order = dplyr::case_match(
			.x = dataset_name,
			"Jean Nicolas" ~ 1,
			"Aurélien Lignereux" ~ 2,
			"Michael Holland" ~ 3,
			"Samuel Cohn" ~ 4,
			"John Bohstedt" ~ 5,
			"Paul Maneuvrier-Hervieu" ~ 6,
			"Matteo Tiratelli" ~ 7
			)
	) %>%
	# total numbers of observations per data set
	dplyr::summarise(
		n = dplyr::n(),
		.by = c(dataset_order, dataset_name)
	) %>%
	dplyr::arrange(
		dataset_order
	) %>%
	print(
		n = nrow(.),
		zero.print = "0",
		na.print = NULL
	)

# NOTES
print(
	paste(
		"Please note that the number reported here differ from those reported in Table 1.",
		"Table 1 reports numbers of social conflicts as indicated by the authors.",
		"However, in HiSCoD, we excluded duplicates within a data set, but also between data sets,",
		"yelding overall a slighlty lower number of observations",
		sep = " "
		)
)


# Table 2: Classification used in the HiSCoD database ----
df %>%
	dplyr::summarise(
		n = dplyr::n(),
		.by = c(riot_type_hiscod_num, riot_type_hiscod)
	) %>%
	dplyr::arrange(
		riot_type_hiscod_num
		) %>%
		print(
			n = nrow(.),
			zero.print = "0",
			na.print = NULL
		)


# clean workspace ----
rm(list = setdiff(ls(), c(lsf.str(), ls.str(mode = "character"))))
file.remove(".Rhistory")

